####################################################################################################
### Title: People Consistently View Elections and Civil Liberties as Key Components of Democracy ###
### Content: Main analysis                                                                       ###
### Date: August 24, 2024                                                                        ###
####################################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/Science_Replication/all_countries")

## Load the required packages
library(ggplot2)         # version 3.4.3
library(tidyverse)       # version 2.0.0
library(estimatr)        # version 1.0.0
library(cregg)           # version 0.4.0
library(expss)           # version 0.11.4
library(ggrepel)         # version 0.9.1
library(cowplot)         # version 1.1.1
library(survey)          # version 4.1.1
library(marginaleffects) # version 0.18.0
library(modelsummary)    # version 0.9.4
library(texreg)          # version 1.37.5

## Read the cleaned datasets
df_US <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_US.csv")
df_JP <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_JP.csv")
df_EG <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_EG.csv")
df_IN <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_IN.csv")
df_IT <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_IT.csv")
df_TH <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_TH.csv")

## Merge the datasets
df_cj <- bind_rows(df_US, df_JP, df_EG, df_IN, df_IT, df_TH)

## Reorder the factors
reorder_levels <- function(df_cj){
  # Electoral democracy
  df_cj$election <- 
    factor(df_cj$election, 
           levels = c("Elections are not held", "Elections are biased", "Elections are free and fair"))
  
  # Liberal democracy
  df_cj$civil <- 
    factor(df_cj$civil, 
           levels = c("Civil liberties are not at all protected", "Civil liberties are weakly protected", "Civil liberties are strongly protected"))
  
  # Institutional democracy
  df_cj$leader <- 
    factor(df_cj$leader, 
           levels = c("Leader is weakly constrained", "Leader is somewhat constrained", "Leader is highly constrained"))
  
  # Populist democracy
  df_cj$populist <- 
    factor(df_cj$populist, 
           levels = c("Leader rarely follows the majority", "Leader sometimes follows the majority", "Leader frequently follows the majority"))
  
  # Loyalist democracy
  df_cj$obedient <- 
    factor(df_cj$obedient, 
           levels = c("Dissidents mostly challenge the gov't", "Dissidents occasionally obey the gov't", "Dissidents mostly obey the gov't"))
  
  # Substantive democracy - economy
  df_cj$econ <- 
    factor(df_cj$econ, 
           levels = c("Economic equality is very low", "Economic equality is somewhat low", "Economic equality is high"))
  
  # Substantive democracy - gender
  df_cj$gender <- 
    factor(df_cj$gender, 
           levels = c("Gender equality is very low", "Gender equality is somewhat low", "Gender equality is high"))
  
  # Technocratic democracy
  df_cj$expert <- 
    factor(df_cj$expert, 
           levels = c("Experts have small influence on policy", "Experts have some influence on policy", "Experts have much influence on policy"))
  
  # Direct democracy
  df_cj$direct <- 
    factor(df_cj$direct, 
           levels = c("Policies are rarely voted on", "Policies are sometimes voted on", "Policies are frequently voted on"))
  
  return(df_cj)
}

df_cj <- reorder_levels(df_cj)

# Country
df_cj$country <- factor(df_cj$country, 
                        levels = c("US", "JP", "EG", "IN", "IT", "TH"),
                        labels = c("United States", "Japan", "Egypt", "India", "Italy", "Thailand"))

## Check if there are complete ties (i.e., ALL attribute-levels are the same for a country pair)
## and remove them if exist
# Identify ties
df_cj$id.task <- paste0(df_cj$id, ".", df_cj$task)
df_wide <- df_cj %>% dplyr::select(id.task, profile, election, civil, leader, populist, obedient, econ, gender, expert, direct)
df_wide <- reshape(df_wide, idvar = "id.task", timevar = "profile", direction = "wide")
df_wide$tie <- 
  ifelse((df_wide$election.1 == df_wide$election.2) &
           (df_wide$civil.1 == df_wide$civil.2) &
           (df_wide$leader.1 == df_wide$leader.2) &
           (df_wide$populist.1 == df_wide$populist.2) &
           (df_wide$obedient.1 == df_wide$obedient.2) &
           (df_wide$econ.1 == df_wide$econ.2) &
           (df_wide$gender.1 == df_wide$gender.2) &
           (df_wide$expert.1 == df_wide$expert.2) &
           (df_wide$direct.1 == df_wide$direct.2), 
         1, 0)
table(df_wide$tie)
temp <- subset(df_wide, tie == 1) # respondent id = R_3RjV1QPR72JSMjA; task = 1

# Remove the complete tie
df_cj <- subset(df_cj, id.task != "R_3RjV1QPR72JSMjA.1")
df_TH <- subset(df_TH, id != "R_3RjV1QPR72JSMjA" | task != 1)

### Figure 1A: AMCEs with the six-country sample ----
## Format attribute labels in plots
df_cj <- apply_labels(df_cj,
                      election = "Electoral Democracy",
                      civil = "Liberal Democracy",
                      leader = "Institutional Democracy",
                      populist = "Populist Democracy",
                      obedient = "Loyalist Democracy",
                      econ = "Substantive Democracy - Economy",
                      gender = "Substantive Democracy - Gender",
                      expert = "Technocratic Democracy",
                      direct = "Direct Democracy")

attributes <- "election + civil + leader + populist + obedient + econ + gender + expert + direct"

## Bold feature labels in plots
bph <- c('bold', rep('plain', 3), 'bold', rep('plain', 3),
         'bold', rep('plain', 3), 'bold', rep('plain', 3), 
         'bold', rep('plain', 3), 'bold', rep('plain', 3),
         'bold', rep('plain', 3), 'bold', rep('plain', 3), 
         'bold', rep('plain', 3)) %>%
  rev() # reverse coding

## Create a function that indicates which estimates are statistically significant
## at the 5% level after using the BH procedure to adjust for multiple comparisons
sig1_fun <- function(data){
  data <- data %>% 
    mutate(sig = case_when(
      (p.adjust(p, method = "BH") < 0.05) == TRUE ~ 1, 
      (p.adjust(p, method = "BH") < 0.05) == FALSE ~ 0)) %>% 
    mutate(sig = factor(sig, levels = c(0, 1)))
  return(data)
}

## Estimate the AMCEs
amces <- cj(data = df_cj,
            formula = as.formula(paste("selected ~", attributes)),
            id = ~id,
            estimate = "amce")
amces <- sig1_fun(amces)
amces$sig[is.na(amces$sig)] <- 0

## Visualize the AMCEs
p <- plot(amces, legend_pos = "none", size = 1) + 
  aes(color = sig) + 
  xlab("Effect on Pr(Being Selected as More Democratic)")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 10, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 9, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 9, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 9, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p_amce <- p + coord_cartesian(xlim = c(-0.02, 0.20))

### Figure 2: AMCEs by country ----
## Estimate the AMCEs
amces <- cj(data = df_cj,
            formula = as.formula(paste("selected ~", attributes)),
            id = ~id,
            estimate = "amce",
            by = ~country)
amces <- sig1_fun(amces)

## Visualize the AMCEs
p <- plot(amces, legend_pos = "none", size = 1) + 
  ggplot2::facet_wrap(~BY, ncol = 3) +
  aes(color = sig) + 
  xlab("Effect on Pr(Being Selected as More Democratic)")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 8, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 6, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 8, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 10, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p <- p + coord_cartesian(xlim = c(-0.08, 0.225))
ggsave("Figure 2.pdf", width = 6, height = 8)

### Figure 3: difference in marginal means by subgroup ----
## By age
df_cj <- df_cj %>% mutate(age_bin = case_when(
  age < 40 ~ 1,
  age > 40 ~ 0
))
df_cj$age_bin <- factor(df_cj$age_bin, 0:1, c("Older", "Younger Than 40"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin)
diff_mms_age <- sig1_fun(diff_mms)

## By gender
df_cj$gender_bin <- factor(df_cj$gender_bin, levels = c("Male", "Female"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~gender_bin)
diff_mms_gender <- sig1_fun(diff_mms)

## By education
df_cj$edu_bin <- factor(df_cj$edu_bin, levels = c("No College", "College"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~edu_bin)
diff_mms_edu <- sig1_fun(diff_mms)

## By minority status
df_cj$minority_bin <- factor(df_cj$minority_bin, levels = c("Non-Minority", "Minority"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~minority_bin)
diff_mms_minority <- sig1_fun(diff_mms)

## By self-reported political ideology
df_cj$ideo_bin <- factor(df_cj$ideo_bin, levels = c("Left", "Right"), labels = c("Leftwing", "Rightwing"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~ideo_bin)
diff_mms_ideo <- sig1_fun(diff_mms)

## By geopolitical alignment
df_cj$pro_china <- factor(df_cj$pro_china, levels = c("Pro-US", "Pro-China"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~pro_china)
diff_mms_china <- sig1_fun(diff_mms)

## Combine the difference-in-MMs plots
combined <- bind_rows(diff_mms_age, diff_mms_gender, diff_mms_edu,
                      diff_mms_minority, diff_mms_ideo, diff_mms_china)
combined$BY <- 
  factor(combined$BY,
         levels = c("Younger Than 40 - Older", "Female - Male", "College - No College",
                    "Minority - Non-Minority", "Rightwing - Leftwing", "Pro-China - Pro-US"))
p <- plot(combined, legend_pos = "none", size = 1) + 
  ggplot2::facet_wrap(~BY, ncol = 3) +
  aes(color = sig) +
  xlab("Difference in Marginal Means of Pr(Being Selected as More Democratic)")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 8, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 6, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 8, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 10, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p <- p + coord_cartesian(xlim = c(-0.1, 0.1))
ggsave("Figure 3.pdf", width = 6, height = 8)

### Figure S21: difference in marginal means by age based on different cutoffs ----
## Younger than 30 vs. older than 30
df_cj <- df_cj %>% mutate(age_bin_30 = case_when(
  age < 30 ~ 1,
  age > 30 ~ 0
))
df_cj$age_bin_30 <- factor(df_cj$age_bin_30, 0:1, c("Older", "Younger Than 30"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin_30)
diff_mms_age_30 <- sig1_fun(diff_mms)

## Younger than 35 vs. older than 35
df_cj <- df_cj %>% mutate(age_bin_35 = case_when(
  age < 35 ~ 1,
  age > 35 ~ 0
))
df_cj$age_bin_35 <- factor(df_cj$age_bin_35, 0:1, c("Older", "Younger Than 35"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin_35)
diff_mms_age_35 <- sig1_fun(diff_mms)

## Younger than 45 vs. older than 45
df_cj <- df_cj %>% mutate(age_bin_45 = case_when(
  age < 45 ~ 1,
  age > 45 ~ 0
))
df_cj$age_bin_45 <- factor(df_cj$age_bin_45, 0:1, c("Older", "Younger Than 45"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin_45)
diff_mms_age_45 <- sig1_fun(diff_mms)

## Younger than 50 vs. older than 50
df_cj <- df_cj %>% mutate(age_bin_50 = case_when(
  age < 50 ~ 1,
  age > 50 ~ 0
))
df_cj$age_bin_50 <- factor(df_cj$age_bin_50, 0:1, c("Older", "Younger Than 50"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin_50)
diff_mms_age_50 <- sig1_fun(diff_mms)

## Younger than 55 vs. older than 55
df_cj <- df_cj %>% mutate(age_bin_55 = case_when(
  age < 55 ~ 1,
  age > 55 ~ 0
))
df_cj$age_bin_55 <- factor(df_cj$age_bin_55, 0:1, c("Older", "Younger Than 55"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin_55)
diff_mms_age_55 <- sig1_fun(diff_mms)

## Combine the difference-in-MMs plots
combined <- bind_rows(diff_mms_age_30, diff_mms_age_35, diff_mms_age,
                      diff_mms_age_45, diff_mms_age_50, diff_mms_age_55)
combined$BY <- 
  factor(combined$BY,
         levels = c("Younger Than 30 - Older", "Younger Than 35 - Older", "Younger Than 40 - Older",
                    "Younger Than 45 - Older", "Younger Than 50 - Older", "Younger Than 55 - Older"))
p <- plot(combined, legend_pos = "none", size = 1) + 
  ggplot2::facet_wrap(~BY, ncol = 3) +
  aes(color = sig) +
  xlab("Difference in Marginal Means of Pr(Being Selected as More Democratic)")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 8, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 6, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 8, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 10, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p <- p + coord_cartesian(xlim = c(-0.1, 0.1))
ggsave("Figure S21.pdf", width = 6, height = 8)

### Figures S2 and S3: AMCEs based on democratic ratings ----
## Create an outcome variable that indicates the respondent's rating of a given country
df_cj <- df_cj %>% mutate(dem_rating = case_when(
  task == 1 & profile == 1 ~ Q1.2,
  task == 1 & profile == 2 ~ Q1.3,
  task == 2 & profile == 1 ~ Q1.6,
  task == 2 & profile == 2 ~ Q1.7,
  task == 3 & profile == 1 ~ Q1.10,
  task == 3 & profile == 2 ~ Q1.11
))

## Estimate the aggregate AMCEs and visualize them
amces <- cj(data = df_cj,
            formula = as.formula(paste("dem_rating ~", attributes)),
            id = ~id,
            estimate = "amce")
amces <- sig1_fun(amces)

p <- plot(amces, legend_pos = "none", size = 1) + 
  aes(color = sig) + 
  xlab("Effect on Democratic Rating")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 8, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 8, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 8, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 10, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p <- p + coord_cartesian(xlim = c(-0.15, 0.9))
ggsave("Figure S2.pdf", width = 6, height = 4)

## Estimate the AMCEs for each country and visualize them
amces <- cj(data = df_cj,
            formula = as.formula(paste("dem_rating ~", attributes)),
            id = ~id,
            estimate = "amce",
            by = ~country)
amces <- sig1_fun(amces)

p <- plot(amces, legend_pos = "none", size = 1) + 
  ggplot2::facet_wrap(~BY, ncol = 3) +
  aes(color = sig) + 
  xlab("Effect on Democratic Rating")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 8, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 6, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 8, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 10, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p <- p + coord_cartesian(xlim = c(-0.3, 0.9))
ggsave("Figure S3.pdf", width = 6, height = 8)

### Figure S4: evaluate whether respondents exposed to more atypical profiles reacted differently to treatments ----
## Remove the respondent with a complete "tie"
df_cj_no_tie <- subset(df_cj, id != "R_3RjV1QPR72JSMjA")

## Flag any atypical profiles
df_cj_no_tie$atypical <- 
  ifelse((df_cj_no_tie$election == "Elections are free and fair" & df_cj_no_tie$civil == "Civil liberties are not at all protected") |
           (df_cj_no_tie$election == "Elections are not held" & df_cj_no_tie$direct == "Policies are frequently voted on") |
           (df_cj_no_tie$civil == "Civil liberties are strongly protected" & df_cj_no_tie$gender == "Gender equality is very low"),
         1, 0)
table(df_cj_no_tie$atypical)

## Count the number of atypical profiles each respondent encountered
df_cj_no_tie$atypical_count <- rep(colSums(matrix(df_cj_no_tie$atypical, nrow = 6)), 6)
table(df_cj_no_tie$atypical_count) %>% prop.table() * 100

## Dichotomize the variable by using 3 as a cutoff
df_cj_no_tie <- df_cj_no_tie %>% mutate(atypical_bin = case_when(
  atypical_count >= 0 & atypical_count <= 2 ~ 0,
  atypical_count >= 3 & atypical_count <= 6 ~ 1,
))
df_cj_no_tie$atypical_bin <- factor(df_cj_no_tie$atypical_bin, 0:1, c("Not Above 2", "Above 2 Atypical Profiles"))

## Graphical method based on differences in AMCEs
amces <- cj(data = df_cj_no_tie, 
            formula = as.formula(paste("selected ~", attributes)),
            id = ~id,
            estimate = "amce",
            by = ~atypical_bin)
amces <- sig1_fun(amces)

diff_amces <- cj(data = df_cj_no_tie, 
                 formula = as.formula(paste("selected ~", attributes)),
                 id = ~id,
                 estimate = "amce_diff",
                 by = ~atypical_bin)
diff_amces <- sig1_fun(diff_amces)

p <- plot(rbind(amces, diff_amces), legend_pos = "none", size = 2) + 
  ggplot2::facet_wrap(~BY, ncol = 3) +
  aes(color = sig) + 
  xlab("Effect on Pr(Being Selected as More Democratic)")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 12, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 12),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )
p <- p + coord_cartesian(xlim = c(-0.225, 0.225))
ggsave("Figure S4.pdf", width = 12, height = 8)

# Formal test based on a nested model comparison test (Leeper et al. 2020)
cj_anova(data = df_cj_no_tie, 
         formula = as.formula(paste("selected ~", attributes)),
         id = ~id,
         by = ~atypical_bin) # F = 1.1338, p = 0.3077

### Figures S11-S13: differences in attribute salience by regime type ----
## Indicate the regime type
df_cj <- df_cj %>% mutate(dem_backdem = case_when(
  country == "Japan" | country == "Italy" ~ 1,
  country == "United States" | country == "India" ~ 0
))

df_cj <- df_cj %>% mutate(dem_nondem = case_when(
  country == "Japan" | country == "Italy" ~ 1,
  country == "Egypt" | country == "Thailand" ~ 0
))

df_cj <- df_cj %>% mutate(nondem_backdem = case_when(
  country == "Egypt" | country == "Thailand" ~ 1,
  country == "United States" | country == "India" ~ 0
))

## Reshape data
df_salience <- df_cj %>% 
  select(1:13, "dem_backdem", "dem_nondem", "nondem_backdem") %>%
  mutate_if(is.factor, as.character) %>% 
  gather("attribute", "level", 4:12)

## Rename attributes
df_salience$attribute <- 
  case_match(df_salience$attribute, 
             "election" ~ "Electoral",
             "civil" ~ "Liberal",
             "leader" ~ "Institutional",
             "populist" ~ "Populist",
             "obedient" ~ "Loyalist",
             "econ" ~ "Economic",
             "gender" ~ "Gender",
             "expert" ~ "Technocratic",
             "direct" ~ "Direct")

## Bootstrapping by clusters (i.e., respondents)
set.seed(1234567)
unique_id <- df_salience %>% 
  select(id) %>% 
  distinct() %>% 
  pull()

# Bootstrap 1000 times (note: this step will take several minutes)
df_bootstrapped <- data.frame(
  draws = rep(1:1000, length(unique_id)), 
  id = sample(unique_id, length(unique_id) * 1000, replace = TRUE)) %>% 
  left_join(df_salience, by = "id")

## Calculate the difference-in-salience scores for democracies vs. backsliding democracies
## (note: this step will take several minutes)
results_1b <- df_bootstrapped %>%
  group_by(draws, attribute, level, dem_backdem) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(salience = abs(average - 0.5)) %>% 
  group_by(draws, dem_backdem, attribute) %>% 
  summarize(average_salience = mean(salience)) %>% 
  ungroup() %>% 
  spread(dem_backdem, average_salience) %>% 
  mutate(diff = (`1` - `0`)) %>% 
  group_by(attribute) %>% 
  summarize(difference_mean = mean(diff),
            difference_bs.low = quantile(diff, 0.05 / 2),
            difference_bs.high = quantile(diff, 1 - 0.05 / 2)) %>%
  ungroup() %>% 
  mutate(sig = ifelse((difference_bs.low > 0 & difference_bs.high > 0) | 
                        (difference_bs.low < 0 & difference_bs.high < 0), 1, 0))

## Calculate salience scores for democracies vs. backsliding democracies
results_1a <- df_salience %>% 
  group_by(attribute, level, dem_backdem) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(deviation = abs(average - 0.5)) %>% 
  group_by(attribute, dem_backdem) %>% 
  summarize(salience = mean(deviation)) %>% 
  ungroup() %>% 
  spread(dem_backdem, salience) %>% 
  left_join(results_1b %>% select(attribute, sig), by = "attribute")

## Visualize attribute salience for democracies vs. backsliding democracies (Figure S10)
p_results_1a <- ggplot(data = results_1a, aes(x = `0`, y = `1`)) +
  geom_abline(slope = 1, color = "gray80") +
  geom_point() +
  geom_text_repel(aes(label = attribute), family = "Times") +
  coord_equal(ylim = c(0, 0.10), xlim = c(0, 0.10)) +
  scale_x_continuous(breaks =  c(0, 0.05, 0.10)) +
  scale_y_continuous(breaks = c(0, 0.05, 0.10)) +
  xlab("Attribute Salience in Backsliding Democracies\n(India, United States)") +
  ylab("Attribute Salience in Democracies\n(Italy, Japan)") +
  theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 12)
  )

p_results_1b <- ggplot(data = results_1b) +
  geom_hline(yintercept = 0, color = "gray60", linetype = "dashed") +
  geom_pointrange(aes(x = reorder(attribute, difference_mean),
                      y = difference_mean,
                      ymin = difference_bs.low,
                      ymax = difference_bs.high),
                  size = 0.3) + 
  coord_flip(ylim = c(-0.06, 0.06)) +
  labs(y = "Difference in Attribute Salience\n(Democracies - Backsliding Democracies)", x = NULL) +
  theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 12),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

plot_grid(p_results_1a, p_results_1b, labels = "AUTO", nrow = 1, 
          label_fontfamily = "Times", label_size = 13)
ggsave("Figure S11.pdf", width = 10, height = 5)

## Calculate the difference-in-salience scores for democracies vs. nondemocracies
## (note: this step will take several minutes)
results_2b <- df_bootstrapped %>%
  group_by(draws, attribute, level, dem_nondem) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(salience = abs(average - 0.5)) %>% 
  group_by(draws, dem_nondem, attribute) %>% 
  summarize(average_salience = mean(salience)) %>% 
  ungroup() %>% 
  spread(dem_nondem, average_salience) %>% 
  mutate(diff = (`1` - `0`)) %>% 
  group_by(attribute) %>% 
  summarize(difference_mean = mean(diff),
            difference_bs.low = quantile(diff, 0.05 / 2),
            difference_bs.high = quantile(diff, 1 - 0.05 / 2)) %>%
  ungroup() %>% 
  mutate(sig = ifelse((difference_bs.low > 0 & difference_bs.high > 0) | 
                        (difference_bs.low < 0 & difference_bs.high < 0), 1, 0))

## Calculate salience scores for democracies vs. nondemocracies
results_2a <- df_salience %>% 
  group_by(attribute, level, dem_nondem) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(deviation = abs(average - 0.5)) %>% 
  group_by(attribute, dem_nondem) %>% 
  summarize(salience = mean(deviation)) %>% 
  ungroup() %>% 
  spread(dem_nondem, salience) %>% 
  left_join(results_2b %>% select(attribute, sig), by = "attribute")

## Visualize attribute salience for democracies vs. nondemocracies (Figure S11)
p_results_2a <- ggplot(data = results_2a, aes(x = `0`, y = `1`)) +
  geom_abline(slope = 1, color = "gray80") +
  geom_point() +
  geom_text_repel(aes(label = attribute), family = "Times") +
  coord_equal(ylim = c(0, 0.10), xlim = c(0, 0.10)) +
  scale_x_continuous(breaks =  c(0, 0.05, 0.10)) +
  scale_y_continuous(breaks = c(0, 0.05, 0.10)) +
  xlab("Attribute Salience in Nondemocracies\n(Egypt, Thailand)") +
  ylab("Attribute Salience in Democracies\n(Italy, Japan)") +
  theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 12)
  )

p_results_2b <- ggplot(data = results_2b) +
  geom_hline(yintercept = 0, color = "gray60", linetype = "dashed") +
  geom_pointrange(aes(x = reorder(attribute, difference_mean),
                      y = difference_mean,
                      ymin = difference_bs.low,
                      ymax = difference_bs.high),
                  size = 0.3) + 
  coord_flip(ylim = c(-0.06, 0.06)) +
  labs(y = "Difference in Attribute Salience\n(Democracies - Nondemocracies)", x = NULL) +
  theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 12),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

plot_grid(p_results_2a, p_results_2b, labels = "AUTO", nrow = 1, 
          label_fontfamily = "Times", label_size = 13)
ggsave("Figure S12.pdf", width = 10, height = 5)

## Calculate the difference-in-salience scores for nondemocracies vs. backsliding democracies
## (note: this step will take several minutes)
results_3b <- df_bootstrapped %>%
  group_by(draws, attribute, level, nondem_backdem) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(salience = abs(average - 0.5)) %>% 
  group_by(draws, nondem_backdem, attribute) %>% 
  summarize(average_salience = mean(salience)) %>% 
  ungroup() %>% 
  spread(nondem_backdem, average_salience) %>% 
  mutate(diff = (`1` - `0`)) %>% 
  group_by(attribute) %>% 
  summarize(difference_mean = mean(diff),
            difference_bs.low = quantile(diff, 0.05 / 2),
            difference_bs.high = quantile(diff, 1 - 0.05 / 2)) %>%
  ungroup() %>% 
  mutate(sig = ifelse((difference_bs.low > 0 & difference_bs.high > 0) | 
                        (difference_bs.low < 0 & difference_bs.high < 0), 1, 0))

## Calculate salience scores for nondemocracies vs. backsliding democracies
results_3a <- df_salience %>% 
  group_by(attribute, level, nondem_backdem) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(deviation = abs(average - 0.5)) %>% 
  group_by(attribute, nondem_backdem) %>% 
  summarize(salience = mean(deviation)) %>% 
  ungroup() %>% 
  spread(nondem_backdem, salience) %>% 
  left_join(results_3b %>% select(attribute, sig), by = "attribute")

## Visualize attribute salience for nondemocracies vs. backsliding democracies (Figure S12)
p_results_3a <- ggplot(data = results_3a, aes(x = `0`, y = `1`)) +
  geom_abline(slope = 1, color = "gray80") +
  geom_point() +
  geom_text_repel(aes(label = attribute), family = "Times") +
  coord_equal(ylim = c(0, 0.10), xlim = c(0, 0.10)) +
  scale_x_continuous(breaks =  c(0, 0.05, 0.10)) +
  scale_y_continuous(breaks = c(0, 0.05, 0.10)) +
  xlab("Attribute Salience in Backsliding Democracies\n(India, United States)") +
  ylab("Attribute Salience in Nondemocracies\n(Egypt, Thailand)") +
  theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 12)
  )

p_results_3b <- ggplot(data = results_3b) +
  geom_hline(yintercept = 0, color = "gray60", linetype = "dashed") +
  geom_pointrange(aes(x = reorder(attribute, difference_mean),
                      y = difference_mean,
                      ymin = difference_bs.low,
                      ymax = difference_bs.high),
                  size = 0.3) + 
  coord_flip(ylim = c(-0.06, 0.06)) +
  labs(y = "Difference in Attribute Salience\n(Nondemocracies - Backsliding Democracies)", x = NULL) +
  theme_bw(base_size = 12, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 12, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 12, colour = "black", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 12),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

plot_grid(p_results_3a, p_results_3b, labels = "AUTO", nrow = 1, 
          label_fontfamily = "Times", label_size = 13)
ggsave("Figure S13.pdf", width = 10, height = 5)

### Figure 1B: attribute salience with the six-country sample ----
## Rename attributes
df_salience$attribute <- 
  case_match(df_salience$attribute, 
             "Electoral" ~ "Electoral\nDemocracy",
             "Liberal" ~ "Liberal\nDemocracy",
             "Institutional" ~ "Institutional\nDemocracy",
             "Populist" ~ "Populist\nDemocracy",
             "Loyalist" ~ "Loyalist\nDemocracy",
             "Economic" ~ "Substantive\nDemocracy\n(Economy)",
             "Gender" ~ "Substantive\nDemocracy\n(Gender)",
             "Technocratic" ~ "Technocratic\nDemocracy",
             "Direct" ~ "Direct\nDemocracy")

## Calculate salience scores
salience <- df_salience %>% 
  group_by(attribute, level) %>% 
  summarize(average = mean(selected)) %>%
  ungroup() %>% 
  mutate(deviation = abs(average - 0.5)) %>% 
  group_by(attribute) %>% 
  summarize(salience_mean = mean(deviation))

## Visualize the estimates
salience$attribute <- 
  factor(salience$attribute,
         levels = c("Electoral\nDemocracy", 
                    "Liberal\nDemocracy", 
                    "Institutional\nDemocracy", 
                    "Populist\nDemocracy", 
                    "Loyalist\nDemocracy",
                    "Substantive\nDemocracy\n(Economy)", 
                    "Substantive\nDemocracy\n(Gender)", 
                    "Technocratic\nDemocracy", 
                    "Direct\nDemocracy"))

p_salience <- ggplot(data = salience) +
  geom_point(aes(x = fct_rev(attribute), y = salience_mean),
             size = 2, shape = 8) + 
  coord_flip(ylim = c(0, 0.07)) +
  labs(y = "Attribute Salience", x = NULL) +
  theme_bw(base_size = 10, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 9, colour = "black", hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 9, colour = "black", face = "bold", hjust = 1, vjust = .5),
    axis.ticks = element_line(colour = "grey50"),
    strip.text.x = element_text(size = 9),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

## Combine the plot with the aggregate AMCE plot
p_amce <- p_amce + ggtitle("Estimated AMCEs") +
  theme(plot.title = element_text(hjust = 0.5, size = 11, face = "italic"))
p_salience <- p_salience + ggtitle("Attribute Salience") + 
  theme(plot.title = element_text(hjust = 0.5, size = 11, face = "italic"))
plot_grid(p_amce, p_salience, labels = "AUTO", nrow = 1, 
          label_fontfamily = "Times", label_size = 11,
          rel_widths = c(2, 1))
ggsave("Figure 1.pdf", width = 8, height = 6)

### Table S5: AMCEs based on logistic regression ----
## Unweighted
design <- survey::svydesign(
  ids = ~id,
  weights = ~1,
  data = df_cj
)

model_logit <- survey::svyglm(
  data = df_cj, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = binomial(link = "logit")
)

mod1 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

## Weighted: Egypt
# Set the benchmark
df_EG <- reorder_levels(df_EG)
df_EG_unweighted <- svydesign(ids = ~id, data = df_EG)
EG_dist <- data.frame(edu_bin = c("No College", "College"),
                      freq = nrow(df_EG_unweighted) * c(1 - 0.0617, 0.0617)) # data from USAID

# Compute and store the weights
df_EG_rake <- rake(design = df_EG_unweighted,
                   sample.margins = list(~edu_bin),
                   population.margins = list(EG_dist))
df_EG$weight <- weights(df_EG_rake)

# Run the logistic regression
design <- survey::svydesign(
  ids = ~id,
  weights = ~weight,
  data = df_EG
)

model_logit <- survey::svyglm(
  data = df_EG, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = quasibinomial(link = "logit") # not binomial because the weights are non-integral
)

mod2 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

## Weighted: India
# Set the benchmark
df_IN <- reorder_levels(df_IN)
df_IN_unweighted <- svydesign(ids = ~id, data = df_IN)
IN_dist <- data.frame(edu_bin = c("No College", "College"),
                      freq = nrow(df_IN_unweighted) * c(1 - 0.1214, 0.1214)) # data from USAID

# Compute and store the weights
df_IN_rake <- rake(design = df_IN_unweighted,
                   sample.margins = list(~edu_bin),
                   population.margins = list(IN_dist))
df_IN$weight <- weights(df_IN_rake)

# Run the logistic regression
design <- survey::svydesign(
  ids = ~id,
  weights = ~weight,
  data = df_IN
)

model_logit <- survey::svyglm(
  data = df_IN, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = quasibinomial(link = "logit") # not binomial because the weights are non-integral
)

mod3 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

## Weighted: Italy
# Set the benchmark
df_IT <- reorder_levels(df_IT)
df_IT_unweighted <- svydesign(ids = ~id, data = df_IT)
IT_dist <- data.frame(edu_bin = c("No College", "College"),
                      freq = nrow(df_IT_unweighted) * c(1 - 0.1655, 0.1655)) # data from USAID

# Compute and store the weights
df_IT_rake <- rake(design = df_IT_unweighted,
                   sample.margins = list(~edu_bin),
                   population.margins = list(IT_dist))
df_IT$weight <- weights(df_IT_rake)

# Run the logistic regression
design <- survey::svydesign(
  ids = ~id,
  weights = ~weight,
  data = df_IT
)

model_logit <- survey::svyglm(
  data = df_IT, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = quasibinomial(link = "logit") # not binomial because the weights are non-integral
)

mod4 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

## Weighted: Japan
# Set the benchmark
df_JP <- reorder_levels(df_JP)
df_JP_unweighted <- svydesign(ids = ~id, data = df_JP)
JP_dist <- data.frame(edu_bin = c("No College", "College"),
                      freq = nrow(df_JP_unweighted) * c(1 - 0.2554, 0.2554)) # data from USAID

# Compute and store the weights
df_JP_rake <- rake(design = df_JP_unweighted,
                   sample.margins = list(~edu_bin),
                   population.margins = list(JP_dist))
df_JP$weight <- weights(df_JP_rake)

# Run the logistic regression
design <- survey::svydesign(
  ids = ~id,
  weights = ~weight,
  data = df_JP
)

model_logit <- survey::svyglm(
  data = df_JP, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = quasibinomial(link = "logit") # not binomial because the weights are non-integral
)

mod5 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

## Weighted: Thailand
# Set the benchmark
df_TH <- reorder_levels(df_TH)
df_TH_unweighted <- svydesign(ids = ~id, data = df_TH)
TH_dist <- data.frame(edu_bin = c("No College", "College"),
                      freq = nrow(df_TH_unweighted) * c(1 - 0.1563, 0.1563)) # data from USAID

# Compute and store the weights
df_TH_rake <- rake(design = df_TH_unweighted,
                   sample.margins = list(~edu_bin),
                   population.margins = list(TH_dist))
df_TH$weight <- weights(df_TH_rake)

# Run the logistic regression
design <- survey::svydesign(
  ids = ~id,
  weights = ~weight,
  data = df_TH
)

model_logit <- survey::svyglm(
  data = df_TH, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = quasibinomial(link = "logit") # not binomial because the weights are non-integral
)

mod6 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

## Weighted: United States
# Set the benchmark
df_US <- reorder_levels(df_US)
df_US_unweighted <- svydesign(ids = ~id, data = df_US)
US_dist <- data.frame(edu_bin = c("No College", "College"),
                      freq = nrow(df_US_unweighted) * c(1 - 0.3505, 0.3505)) # data from USAID

# Compute and store the weights
df_US_rake <- rake(design = df_US_unweighted,
                   sample.margins = list(~edu_bin),
                   population.margins = list(US_dist))
df_US$weight <- weights(df_US_rake)

# Run the logistic regression
design <- survey::svydesign(
  ids = ~id,
  weights = ~weight,
  data = df_US
)

model_logit <- survey::svyglm(
  data = df_US, 
  formula = as.formula(paste("selected ~", attributes)),
  design = design,
  family = quasibinomial(link = "logit") # not binomial because the weights are non-integral
)

mod7 <- model_logit %>% marginaleffects::avg_slopes(newdata = "mean")

# Tabulate the average marginal effects from each weighted logistic regression
texreg(list(mod1, mod2, mod3, mod4, mod5, mod6, mod7),
       stars = numeric(0),
       custom.model.names = c("Aggregate", "Egypt", "India", "Italy", "Japan", "Thailand", "USA"),
       custom.coef.names = c("Liberal High", "Liberal Medium", "Direct High", "Direct Medium",
                             "Economic High", "Economic Medium", "Electoral Medium", "Electoral High",
                             "Technocratic High", "Technocratic Medium", "Gender High", "Gender Medium",
                             "Institutional High", "Institutional Medium", "Loyalist High",
                             "Loyalist Medium", "Populist High", "Populist Medium"),
       reorder.coef = c(8, 7, 1, 2, 13, 14, 17, 18, 15, 16, 5, 6, 11, 12, 9, 10, 3, 4))
